package com.yjjxt

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * 125.14.22.205	广西	2017-10-10	1512012331013	523042367028986550	www.mi.com	Buy
 */
object Hello23PvUv {
  def main(args: Array[String]): Unit = {

    //创建SparkContext
    val sparkContext = new SparkContext((new SparkConf().setMaster("local").setAppName("Join" + System.currentTimeMillis())))
    //开始读取数据
    val lines: RDD[String] = sparkContext.textFile("src/main/resources/pvuvdata")
    //切分PV
    //    val pv: collection.Map[String, Long] = lines.filter(_.split("\\s").length == 7).map(_.split("\\s")(5)).countByValue()
    //    pv.foreach(ele => println("pv=>" + ele._1 + ":" + ele._2))

    //切分UV
    val uv = lines.filter(_.split("\\s").length == 7).map(ele => (ele.split("\\s")(5), ele.split("\\s")(4))).distinct().countByKey()
    uv.foreach(ele => println("uv=>" + ele._1 + ":" + ele._2))
    sparkContext.stop()
  }
}
